import numpy as np
import pandas as pd
import itertools
from pprint import pprint
import matplotlib.pyplot as plt
import plotly.express as px
from ax.plot.contour import plot_contour
from ax.plot.trace import optimization_trace_single_method
from ax.utils.notebook.plotting import render, init_notebook_plotting
from ax.service.managed_loop import optimize
from ax.metrics.branin import branin
from ax.utils.measurement.synthetic_functions import hartmann6
init_notebook_plotting()
[INFO 05-03 13:16:45] ax.utils.notebook.plotting: Injecting Plotly library into cell. Do not overwrite or delete cell.
df = pd.read_csv('iperf3-tcp-rmem-wmem-results-correct-header.csv')
print(df.shape)
df.head()
(210, 11)
| trial num | timestamp | results 1 | results 2 | results 3 | net.ipv4.tcp_rmem min | net.ipv4.tcp_rmem default | net.ipv4.tcp_rmem max | net.ipv4.tcp_wmem default | net.ipv4.tcp_wmem default.1 | net.ipv4.tcp_wmem max | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 1 | 220422192422 | 9162 | 9302 | 9292 | 4096 | 87380 | 90000 | 4096 | 16384 | 20000 |
| 1 | 2 | 220422192533 | 16617 | 15727 | 16737 | 4096 | 87380 | 90000 | 4096 | 16384 | 40000 |
| 2 | 3 | 220422192735 | 21112 | 20546 | 21637 | 4096 | 87380 | 90000 | 4096 | 16384 | 75000 |
| 3 | 4 | 220422192907 | 20332 | 20126 | 19944 | 4096 | 87380 | 90000 | 4096 | 16384 | 100000 |
| 4 | 5 | 220422193019 | 20497 | 21210 | 21081 | 4096 | 87380 | 90000 | 4096 | 16384 | 150000 |
Note: the max columns are NOT unique
Note: results columns are of type object since there's a timeout value. Remove these rows
#Remove rows where any results are timeouts: inelegant but okay
results_cols = [c for c in df if c.find('results')==0]
print(results_cols)
df2 = df.copy()
print(df2.shape)
for c in results_cols:
df2 = df2[df2[c]!='timeout'].copy()
df2[c] = df2[c].astype('float')
print(f'Cleaning {c}: {df2.shape}')
df2['results_mean'] = df2[results_cols].mean(axis=1)
df2['results_sem'] = df2[results_cols].sem(axis=1)
df2.head()
['results 1', 'results 2', 'results 3'] (210, 11) Cleaning results 1: (209, 11) Cleaning results 2: (209, 11) Cleaning results 3: (209, 11)
| trial num | timestamp | results 1 | results 2 | results 3 | net.ipv4.tcp_rmem min | net.ipv4.tcp_rmem default | net.ipv4.tcp_rmem max | net.ipv4.tcp_wmem default | net.ipv4.tcp_wmem default.1 | net.ipv4.tcp_wmem max | results_mean | results_sem | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 1 | 220422192422 | 9162.0 | 9302.0 | 9292.0 | 4096 | 87380 | 90000 | 4096 | 16384 | 20000 | 9252.000000 | 45.092498 |
| 1 | 2 | 220422192533 | 16617.0 | 15727.0 | 16737.0 | 4096 | 87380 | 90000 | 4096 | 16384 | 40000 | 16360.333333 | 318.555769 |
| 2 | 3 | 220422192735 | 21112.0 | 20546.0 | 21637.0 | 4096 | 87380 | 90000 | 4096 | 16384 | 75000 | 21098.333333 | 315.018694 |
| 3 | 4 | 220422192907 | 20332.0 | 20126.0 | 19944.0 | 4096 | 87380 | 90000 | 4096 | 16384 | 100000 | 20134.000000 | 112.077354 |
| 4 | 5 | 220422193019 | 20497.0 | 21210.0 | 21081.0 | 4096 | 87380 | 90000 | 4096 | 16384 | 150000 | 20929.333333 | 219.350810 |
fig = px.scatter(df2,
x='net.ipv4.tcp_rmem max',
y='net.ipv4.tcp_wmem max',
color='results_mean')
fig.show()
fig = px.scatter_3d(df2,
x='net.ipv4.tcp_rmem max',
y='net.ipv4.tcp_wmem max',
z='results_mean',
color='results_mean')
fig.show()
log difference was chosen because we want a metric that is sensitive whenever a tunable hits low values. The difference ensures that the limits: tunable 1 $\rightarrow$ 0 and tunable 2 $\rightarrow$ 0 get mapped to +/- $\infty$. This is not very carefully thought out and is just a simple transformation from a 2d space to a 1d space.
#df2['tune_ratio_rmem_over_wmem'] = df2['net.ipv4.tcp_rmem max'] / df2['net.ipv4.tcp_wmem max']
df2['logdiff_rmem_wmem'] = np.log(df2['net.ipv4.tcp_rmem max']) - np.log(df2['net.ipv4.tcp_wmem max'])
fig = px.scatter(df2,
x='logdiff_rmem_wmem',
y='results_mean',
color='results_mean')
fig.show()
df2.head()
| trial num | timestamp | results 1 | results 2 | results 3 | net.ipv4.tcp_rmem min | net.ipv4.tcp_rmem default | net.ipv4.tcp_rmem max | net.ipv4.tcp_wmem default | net.ipv4.tcp_wmem default.1 | net.ipv4.tcp_wmem max | results_mean | results_sem | tune_ratio_rmem_over_wmem | logdiff_rmem_wmem | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 1 | 220422192422 | 9162.0 | 9302.0 | 9292.0 | 4096 | 87380 | 90000 | 4096 | 16384 | 20000 | 9252.000000 | 45.092498 | 4.50 | 1.504077 |
| 1 | 2 | 220422192533 | 16617.0 | 15727.0 | 16737.0 | 4096 | 87380 | 90000 | 4096 | 16384 | 40000 | 16360.333333 | 318.555769 | 2.25 | 0.810930 |
| 2 | 3 | 220422192735 | 21112.0 | 20546.0 | 21637.0 | 4096 | 87380 | 90000 | 4096 | 16384 | 75000 | 21098.333333 | 315.018694 | 1.20 | 0.182322 |
| 3 | 4 | 220422192907 | 20332.0 | 20126.0 | 19944.0 | 4096 | 87380 | 90000 | 4096 | 16384 | 100000 | 20134.000000 | 112.077354 | 0.90 | -0.105361 |
| 4 | 5 | 220422193019 | 20497.0 | 21210.0 | 21081.0 | 4096 | 87380 | 90000 | 4096 | 16384 | 150000 | 20929.333333 | 219.350810 | 0.60 | -0.510826 |
df3 = df2[['net.ipv4.tcp_rmem max', 'net.ipv4.tcp_wmem max', 'results_mean', 'results_sem']]\
.set_index(['net.ipv4.tcp_rmem max', 'net.ipv4.tcp_wmem max'])
def eval_func(params, df, tunable_cols):
x = [params.get(name) for name in tunable_cols]
#no noise on either val or l2norm
try:
res = {
'result': (df['results_mean'].loc[tuple(x)], df['results_sem'].loc[tuple(x)])
}
except:
raise ValueError(f"Cannot find key {x}")
return res
objective = lambda params: eval_func(params, df3, ['net.ipv4.tcp_rmem max', 'net.ipv4.tcp_wmem max'])
objective({'net.ipv4.tcp_rmem max': 90000, 'net.ipv4.tcp_wmem max': 20000})
{'result': (9252.0, 45.09249752822895)}
def prepare_search_space(df, var_type='discrete'):
if var_type != 'discrete' and var_type != 'range':
raise ValueError('var_type should be "discrete" or "range"')
idx = df.index
search_space = []
for i, name in enumerate(idx.names):
if var_type=='range':
s = {'name': name,
'type': 'range',
'bounds': [np.min([val[i] for val in idx]), np.max([val[i] for val in idx])],
'value_type': 'float',
'log_scale': False
}
search_space.append(s)
elif var_type=='discrete':
s = {'name': name,
'type': 'choice',
'values': list(np.unique([val[i] for val in idx])),
'value_type': 'float',
'log_scale': False
}
search_space.append(s)
return search_space
search_space = prepare_search_space(df3, var_type='discrete')
best_params, values, exp, model = optimize(parameters=search_space,
evaluation_function=objective,
experiment_name=f'iperf3_discrete',
objective_name='result',
minimize=False,
total_trials=30,
)
/home/sanjay/Downloads/venv_pytorch/lib64/python3.9/site-packages/ax/core/parameter.py:467: UserWarning: `is_ordered` is not specified for `ChoiceParameter` "net.ipv4.tcp_rmem max". Defaulting to `True` for parameters of `ParameterType` FLOAT. To override this behavior (or avoid this warning), specify `is_ordered` during `ChoiceParameter` construction. /home/sanjay/Downloads/venv_pytorch/lib64/python3.9/site-packages/ax/core/parameter.py:467: UserWarning: `sort_values` is not specified for `ChoiceParameter` "net.ipv4.tcp_rmem max". Defaulting to `True` for parameters of `ParameterType` FLOAT. To override this behavior (or avoid this warning), specify `sort_values` during `ChoiceParameter` construction. /home/sanjay/Downloads/venv_pytorch/lib64/python3.9/site-packages/ax/core/parameter.py:467: UserWarning: `is_ordered` is not specified for `ChoiceParameter` "net.ipv4.tcp_wmem max". Defaulting to `True` for parameters of `ParameterType` FLOAT. To override this behavior (or avoid this warning), specify `is_ordered` during `ChoiceParameter` construction. /home/sanjay/Downloads/venv_pytorch/lib64/python3.9/site-packages/ax/core/parameter.py:467: UserWarning: `sort_values` is not specified for `ChoiceParameter` "net.ipv4.tcp_wmem max". Defaulting to `True` for parameters of `ParameterType` FLOAT. To override this behavior (or avoid this warning), specify `sort_values` during `ChoiceParameter` construction. [INFO 05-03 13:52:24] ax.service.utils.instantiation: Created search space: SearchSpace(parameters=[ChoiceParameter(name='net.ipv4.tcp_rmem max', parameter_type=FLOAT, values=[90000.0, 100000.0, 150000.0, 200000.0, 300000.0, 500000.0, 750000.0, 1000000.0, 1500000.0, 2000000.0, 4000000.0, 6000000.0, 8000000.0, 10000000.0], is_ordered=True, sort_values=True), ChoiceParameter(name='net.ipv4.tcp_wmem max', parameter_type=FLOAT, values=[20000.0, 40000.0, 75000.0, 100000.0, 150000.0, 200000.0, 300000.0, 500000.0, 750000.0, 1000000.0, 1500000.0, 2000000.0, 4000000.0, 6000000.0, 8000000.0], is_ordered=True, sort_values=True)], parameter_constraints=[]). [INFO 05-03 13:52:24] ax.modelbridge.dispatch_utils: Using Bayesian optimization since there are more ordered parameters than there are categories for the unordered categorical parameters. [INFO 05-03 13:52:24] ax.modelbridge.dispatch_utils: Using Bayesian Optimization generation strategy: GenerationStrategy(name='Sobol+GPEI', steps=[Sobol for 5 trials, GPEI for subsequent trials]). Iterations after 5 will take longer to generate due to model-fitting. [INFO 05-03 13:52:24] ax.service.managed_loop: Started full optimization with 30 steps. [INFO 05-03 13:52:24] ax.service.managed_loop: Running optimization trial 1... [INFO 05-03 13:52:24] ax.service.managed_loop: Running optimization trial 2... [INFO 05-03 13:52:24] ax.service.managed_loop: Running optimization trial 3... [INFO 05-03 13:52:24] ax.service.managed_loop: Running optimization trial 4... [INFO 05-03 13:52:24] ax.service.managed_loop: Running optimization trial 5... [INFO 05-03 13:52:24] ax.service.managed_loop: Running optimization trial 6... [INFO 05-03 13:52:24] ax.service.managed_loop: Running optimization trial 7... [INFO 05-03 13:52:24] ax.service.managed_loop: Running optimization trial 8... [INFO 05-03 13:52:25] ax.service.managed_loop: Running optimization trial 9... [INFO 05-03 13:52:25] ax.service.managed_loop: Running optimization trial 10... [INFO 05-03 13:52:25] ax.service.managed_loop: Running optimization trial 11... [INFO 05-03 13:52:25] ax.service.managed_loop: Running optimization trial 12... [INFO 05-03 13:52:26] ax.service.managed_loop: Running optimization trial 13... [INFO 05-03 13:52:26] ax.service.managed_loop: Running optimization trial 14... [INFO 05-03 13:52:26] ax.service.managed_loop: Running optimization trial 15... [INFO 05-03 13:52:26] ax.service.managed_loop: Running optimization trial 16... [INFO 05-03 13:52:27] ax.service.managed_loop: Running optimization trial 17... [INFO 05-03 13:52:27] ax.service.managed_loop: Running optimization trial 18... [INFO 05-03 13:52:27] ax.service.managed_loop: Running optimization trial 19... [INFO 05-03 13:52:28] ax.service.managed_loop: Running optimization trial 20... [INFO 05-03 13:52:28] ax.service.managed_loop: Running optimization trial 21... [INFO 05-03 13:52:28] ax.service.managed_loop: Running optimization trial 22... [INFO 05-03 13:52:29] ax.service.managed_loop: Running optimization trial 23... [INFO 05-03 13:52:29] ax.service.managed_loop: Running optimization trial 24... [INFO 05-03 13:52:29] ax.service.managed_loop: Running optimization trial 25... [INFO 05-03 13:52:30] ax.service.managed_loop: Running optimization trial 26... [INFO 05-03 13:52:30] ax.service.managed_loop: Running optimization trial 27... [INFO 05-03 13:52:31] ax.service.managed_loop: Running optimization trial 28... [INFO 05-03 13:52:31] ax.service.managed_loop: Running optimization trial 29... [INFO 05-03 13:52:32] ax.service.managed_loop: Running optimization trial 30...
df3
| results_mean | results_sem | ||
|---|---|---|---|
| net.ipv4.tcp_rmem max | net.ipv4.tcp_wmem max | ||
| 90000 | 20000 | 9252.000000 | 45.092498 |
| 40000 | 16360.333333 | 318.555769 | |
| 75000 | 21098.333333 | 315.018694 | |
| 100000 | 20134.000000 | 112.077354 | |
| 150000 | 20929.333333 | 219.350810 | |
| ... | ... | ... | ... |
| 10000000 | 1500000 | 27832.333333 | 82.648520 |
| 2000000 | 27781.666667 | 145.861045 | |
| 4000000 | 28057.666667 | 294.949902 | |
| 6000000 | 28684.666667 | 445.499470 | |
| 8000000 | 28403.000000 | 57.073053 |
209 rows × 2 columns
print(f'Optimal params: {best_params}\n')
print(f'Best value: {values}\n')
print(f"Best Grid Value:\n")
df3[df3['results_mean']==df3['results_mean'].max()]
Optimal params: {'net.ipv4.tcp_rmem max': 300000.0, 'net.ipv4.tcp_wmem max': 1500000.0}
Best value: ({'result': 33953.23848852991}, {'result': {'result': 2737.7784985501976}})
Best Grid Value:
| results_mean | results_sem | ||
|---|---|---|---|
| net.ipv4.tcp_rmem max | net.ipv4.tcp_wmem max | ||
| 300000 | 500000 | 34141.0 | 131.730786 |
plt.plot(np.array(df3.sort_values(by='results_mean')['results_mean']))
plt.xlabel("Index")
plt.ylabel("results_mean")
plt.title("results_mean sorted")
Text(0.5, 1.0, 'results_mean sorted')
obj_vals = np.array([[trial.objective_mean for trial in exp.trials.values()]])
true_optimum = df3['results_mean'].max()
trace_plot = optimization_trace_single_method(y=np.maximum.accumulate(obj_vals, axis=1),
optimum=true_optimum,
title='Maximum objective value so far',
ylabel='results_mean')
render(trace_plot)
trace_plot = optimization_trace_single_method(y=obj_vals,
optimum=true_optimum,
title='Objective value',
ylabel='results_mean')
render(trace_plot)